import requests
from bs4 import BeautifulSoup
import pandas as pd


# 定义爬取数据的函数
def crawl_links(url):
    response = requests.get(url)
    response.raise_for_status()
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    links_and_texts = []
    for a_tag in soup.find_all('a'):
        href = a_tag.get('href')
        text = a_tag.get_text(strip=True)
        links_and_texts.append({'链接': href, '文本': text})
    return links_and_texts


# 定义要写入的Excel文件名
excel_file = 'links_and_texts.xlsx'

# 爬取第一个网站的链接和文本
url1 = 'https://www.jd.com/?cu=true'  # 替换为实际的第一个网站URL
links_and_texts_sheet1 = crawl_links(url1)

# 爬取第二个网站的链接和文本
url2 = 'http://www.qingdao.gov.cn/zwgk/xxgk/'  # 替换为实际的第二个网站URL
links_and_texts_sheet2 = crawl_links(url2)

# 创建DataFrame并写入Excel的两个sheet中
df1 = pd.DataFrame(links_and_texts_sheet1)
df2 = pd.DataFrame(links_and_texts_sheet2)

with pd.ExcelWriter(excel_file, engine='xlsxwriter') as writer:
    df1.to_excel(writer, sheet_name='Sheet1', index=False)
    df2.to_excel(writer, sheet_name='Sheet2', index=False)

print(f'数据已写入 {excel_file}')